Libraries

knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
library(ggplot2)
library(tidyr)
library(dplyr)
library(tibble)
library(plotly)
library(ggcorrplot)

set.seed(1)

Import data

data <- read.csv("data/mp_batteries.csv", sep=",", header=TRUE) %>%
  drop_na()

knitr::kable(head(data, 10))
Battery.ID Battery.Formula Working.Ion Formula.Charge Formula.Discharge Max.Delta.Volume Average.Voltage Gravimetric.Capacity Volumetric.Capacity Gravimetric.Energy Volumetric.Energy Atomic.Fraction.Charge Atomic.Fraction.Discharge Stability.Charge Stability.Discharge Steps Max.Voltage.Step
mp-30_Al Al0-2Cu Al Cu Al2Cu 3.0433992 0.0890331 1368.48055 5562.7901 121.840086 495.272533 0.0000000 0.6666667 0.0000000 0.0000000 1 0
mp-1022721_Al Al1-3Cu Al AlCu Al3Cu 1.2436528 -0.0215863 1112.93655 4418.9798 -24.024232 -95.389622 0.5000000 0.7500000 0.0740612 0.0962458 1 0
mp-8637_Al Al0-5Mo Al Mo Al5Mo 4.7625743 0.1227568 1741.50416 7175.7017 213.781556 880.866507 0.0000000 0.8333333 0.4114601 0.0452120 1 0
mp-129_Al Al0-12Mo Al Mo Al12Mo 12.7238931 0.0431214 2298.81076 7346.2323 99.128013 316.780060 0.0000000 0.9230769 0.0000000 0.0114456 1 0
mp-91_Al Al0-12W Al W Al12W 12.4945977 0.0292342 1900.74513 7332.7186 55.566774 214.366205 0.0000000 0.9230769 0.0000000 0.0000000 1 0
mp-1055908_Al Al0-12Mn Al Mn MnAl12 18.2361563 0.0397314 2547.69280 7592.9161 101.223298 301.676876 0.0000000 0.9230769 0.1454643 0.0000000 1 0
mp-2658_Al Al0-1Fe Al Fe AlFe 0.7711539 0.4717287 970.75702 5622.3562 457.933974 2652.226958 0.0000000 0.5000000 0.7613994 0.0000000 1 0
mp-16722_Al Al1-10.25V Al Al10V Al41V4 0.0027108 -0.0155827 61.37701 176.4151 -0.956421 -2.749028 0.9090909 0.9111111 0.0118097 0.0125861 1 0
mp-998981_Al Al1-3Ti Al TiAl TiAl3 0.9562924 0.1602450 1248.40362 4248.4211 200.050419 680.788169 0.5000000 0.7500000 0.1415912 0.0244962 1 0
mp-8633_K K0-3Cr K Cr K3Cr 15.8029363 -0.7487069 474.94813 667.5593 -355.596958 -499.806269 0.0000000 0.7500000 0.4025263 0.6621618 1 0
  • Battery.ID: character - Unique battery identifier
  • Battery.Formula: character - Chemical formula of battery
  • Working.Ion: character - Primary working ion
  • Formula.Charge: character - Formula in charged state
  • Formula.Discharge: character - Formula in discharged state
  • Max.Delta.Volume: numeric - Max volume change (%)
  • Average.Voltage: numeric - Average operating voltage
  • Gravimetric.Capacity: numeric - Capacity per unit mass
  • Volumetric.Capacity: numeric - Capacity per unit volume
  • Gravimetric.Energy: numeric - Energy per unit mass
  • Volumetric.Energy: numeric - Energy per unit volume
  • Atomic.Fraction.Charge: numeric - Atomic fraction (charged)
  • Atomic.Fraction.Discharge: numeric - Atomic fraction (discharged)
  • Stability.Charge: numeric - Stability in charged state
  • Stability.Discharge: numeric - Stability in discharged state
  • Steps: integer - Number of transition steps
  • Max.Voltage.Step: numeric - Largest voltage difference

Basic statistics

data %>% summarise(n())
##    n()
## 1 4351
data %>%
  summarise(across(where(is.numeric),
                   list(mean = ~mean(.),
                        median = ~median(.),
                        min = ~min(.),
                        max = ~max(.),
                        sd = ~sd(.),
                        n_unique = ~n_distinct(.)),
                   .names = "{col}_{fn}")) %>%
  pivot_longer(everything(), 
               names_to = c("statistic", "column"),
               names_sep = "_",
               values_to = "value") %>%
  pivot_wider(names_from = column, values_from = value) %>%
  knitr::kable()
statistic mean median min max sd n
Max.Delta.Volume 0.3753137 0.0420271 0.0000162 2.931932e+02 6.8518375 4342
Average.Voltage 3.0831427 3.3005818 -7.7547512 5.456883e+01 1.8220562 4351
Gravimetric.Capacity 158.2908894 130.6909797 5.1765430 2.557627e+03 164.9136411 3330
Volumetric.Capacity 610.6240987 507.0312049 24.0790699 7.619191e+03 563.8531258 4342
Gravimetric.Energy 444.1063802 401.7876573 -583.5458444 5.926950e+03 351.0481297 4351
Volumetric.Energy 1664.0484137 1463.7877150 -2208.0745659 1.830590e+04 1297.7985678 4351
Atomic.Fraction.Charge 0.0398558 0.0000000 0.0000000 9.090909e-01 0.0885604 126
Atomic.Fraction.Discharge 0.1590772 0.1428571 0.0074074 9.933333e-01 0.1203743 192
Stability.Charge 0.1425666 0.0731920 0.0000000 6.487098e+00 0.3782776 3050
Stability.Discharge 0.1220717 0.0487845 0.0000000 6.277809e+00 0.3523182 3933
Steps 1.1670880 1.0000000 1.0000000 6.000000e+00 0.4637496 6
Max.Voltage.Step 0.1502897 0.0000000 0.0000000 2.696069e+01 0.6300680 600

Data distributions

data %>% 
  select(where(is.numeric)) %>%
  gather(key = "attribute", value = "value") %>%
  ggplot(aes(x = value)) +
  geom_histogram(bins = 30, fill = "lightblue", color = "black") +
  facet_wrap(~attribute, scales = "free") +
  theme_minimal() +
  labs(title = "Distribution of values", x = "Value", y = "Frequency")

Correlations analysis

numeric_columns <- data %>% select(where(is.numeric))
cor_matrix <- cor(numeric_columns, use = "complete.obs")

cor_data <- as.data.frame(as.table(cor_matrix))

# Interaktywny wykres
plot_ly(
  data = cor_data,
  x = ~Var1,
  y = ~Var2,
  z = ~Freq,
  zmin = -1,
  zmax = 1,
  type = "heatmap",
  colors = c("blue", "white", "red")
) %>%
  layout(
    title = "Correlation matrix",
    xaxis = list(title = ""),
    yaxis = list(title = "")
  )